package is2.parserR2;


import is2.data.Cluster;
import is2.data.DataF;
import is2.data.Edges;
import is2.data.F2SF;
import is2.data.FV;
import is2.data.Instances;
import is2.data.Long2Int;
import is2.data.Long2IntInterface;
import is2.data.MFB;
import is2.data.Parse;
import is2.data.ParseNBest;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.CONLLWriter09;

import is2.tools.Tool;
import is2.util.DB;
import is2.util.OptionsSuper;
import is2.util.ParserEvaluator;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

//import extractors.ExtractorClusterStackedR2;
import extractors.Extractor;
import extractors.ExtractorFactory;



public class Parser implements Tool {

	// output evaluation info 
	private static final boolean MAX_INFO = true;

	public static int THREADS =4;
	
	Long2IntInterface l2i;
	ParametersFloat params;
	Pipe pipe;
	OptionsSuper options;
	
	 HashMap<Integer,Integer> rank = new HashMap<Integer,Integer>();
	 int amongxbest=0, amongxbest_ula=0, nbest=0,bestProj=0, smallestErrorSum=0, countAllNodes=0;
	 static int NBest =1000;
	
	 ExtractorFactory extractorFactory = new ExtractorFactory(ExtractorFactory.StackedClusteredR2);
	 
	
	/**
	 * Initialize the parser
	 * @param options
	 */
	public Parser (OptionsSuper options) {
	
		this.options=options;
		pipe = new Pipe(options);

		params = new ParametersFloat(0);  
		
		// load the model
		try {
			readModel(options, pipe, params);
		} catch (Exception e) {
			e.printStackTrace();
		}
				
	}


	/**
	 * @param modelFileName The file name of the parsing model
	 */
	public Parser(String modelFileName) {
		this(new Options(new String[]{"-model",modelFileName}));
	}


	/**
	 * 
	 */
	public Parser() {
		// TODO Auto-generated constructor stub
	}


	public static void main (String[] args) throws Exception
	{
		
		long start = System.currentTimeMillis();
		OptionsSuper options = new Options(args);

		NBest = options.best;

		DB.println("n-best"+NBest);
		
		Runtime runtime = Runtime.getRuntime();
		THREADS = runtime.availableProcessors();
		if (options.cores<THREADS&&options.cores>0) THREADS =options.cores;		

		DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS);

		if (options.train) {
			
			Parser p =new Parser();
			p.options=options;
			
			p.l2i = new Long2Int(options.hsize);

			p.pipe =  new Pipe (options);
			Instances is = new Instances();

			p.pipe.extractor = new Extractor[THREADS];
			
			for (int t=0;t<THREADS;t++)  p.pipe.extractor[t]=p.extractorFactory.getExtractor( p.l2i);

			p.params = new ParametersFloat(p.l2i.size());
			
			if (options.useMapping!=null) {
				String model = options.modelName;
				
				options.modelName = options.useMapping;
				DB.println("Using mapping of model "+options.modelName);
				ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
				zis.getNextEntry();
				DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
				p.pipe.mf.read(dis);
				
				DB.println("read\n"+p.pipe.mf.toString());
				
				ParametersFloat params = new ParametersFloat(0);
				params.read(dis);

				Edges.read(dis);
				
				dis.close();
				DB.println("end read model");
				options.modelName = model;
			}
			
			
			p.pipe.createInstances(options.trainfile,is); 


			p.train(options, p.pipe,p.params,is,p.pipe.cl);

			p.writeModell(options, p.params, null,p.pipe.cl);

		}

		if (options.test) {

			Parser p = new Parser();
			p.options=options;

			p. pipe = new Pipe(options);
			p. params = new ParametersFloat(0);  // total should be zero and the parameters are later read

			// load the model

			p.readModel(options, p.pipe, p.params);

			DB.println("test on "+options.testfile);
			
			System.out.println(""+p.pipe.mf.toString());
			
			
			p.outputParses(options, p.pipe, p.params, !MAX_INFO);

		}

		System.out.println();

		if (options.eval) {
			System.out.println("\nEVALUATION PERFORMANCE:");
			ParserEvaluator.evaluate(options.goldfile, options.outfile);
		}

		long end = System.currentTimeMillis();
		System.out.println("used time "+((float)((end-start)/100)/10));

		Decoder.executerService.shutdown();		
		Pipe.executerService.shutdown();
		System.out.println("end.");

	
	}

	/**
	 * Read the models and mapping
	 * @param options
	 * @param pipe
	 * @param params
	 * @throws IOException
	 */
	public   void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException {


		DB.println("Reading data started");

		// prepare zipped reader
		ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
		zis.getNextEntry();
		DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));

		pipe.mf.read(dis);

		pipe.cl = new Cluster(dis);
		
		params.read(dis);
		this.l2i = new Long2Int(params.size());
		DB.println("parsing -- li size "+l2i.size());

		pipe.extractor = new Extractor[THREADS];

		for (int t=0;t<THREADS;t++) pipe.extractor[t]=this.extractorFactory.getExtractor(l2i);

		Edges.read(dis);

		options.decodeProjective = dis.readBoolean();

		int maxForm = dis.readInt();

		for (int t=0;t<THREADS;t++)  {
			pipe.extractor[t].setMaxForm(maxForm);
			pipe.extractor[t].initStat();
			pipe.extractor[t].init();
		}
		
		boolean foundInfo =false;
		try {
			String info =null;
			int icnt = dis.readInt();
			for(int i=0;i<icnt;i++) {
				info = dis.readUTF();
				System.out.println(info);
			}
		} catch (Exception e) {
			if (!foundInfo) System.out.println("no info about training");
		}
		
		
		dis.close();

		DB.println("Reading data finnished");

		Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
		for (int t=0;t<THREADS;t++) {
			pipe.extractor[t].initStat();
			pipe.extractor[t].init();
		}

	}



	/**
	 * Do the training
	 * @param instanceLengths
	 * @param options
	 * @param pipe
	 * @param params
	 * @param is 
	 * @param cluster 
	 * @throws IOException
	 * @throws InterruptedException
	 * @throws ClassNotFoundException
	 */
	public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster) 
	throws IOException, InterruptedException, ClassNotFoundException {

		
		DB.println("\nTraining Information ");
		DB.println("-------------------- ");


		Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;

		if (options.decodeProjective) System.out.println("Decoding: "+(options.decodeProjective?"projective":"non-projective")); 
		else System.out.println(""+Decoder.getInfo());
		int numInstances = is.size();

		int maxLenInstances =0;
		for(int i=0;i<numInstances;i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i);

		DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue());

		int iter = 0;
		int del=0; 
		float error =0;
		float f1=0;

		FV pred = new FV();
		FV act = new FV();

		double	upd =  (double)(numInstances*options.numIters)+1;
		
		for(; iter < options.numIters; iter++) {

			System.out.print("Iteration "+iter+": ");

			long start = System.currentTimeMillis();

			long last= System.currentTimeMillis();
			error=0;
			f1=0;
			for(int n = 0; n < numInstances; n++) {

				upd--;
				
				if (is.labels[n].length>options.maxLen) continue;
				
				String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
				+" te "+((Pipe.timeExtract)/1000000F);

				if((n+1) %500 == 0) del= PipeGen.outValueErr(n+1,Math.round(error*1000)/1000,f1/n,del, last, upd,info);

				short pos[] = is.pposs[n];

				data = pipe.fillVector((F2SF)params.getFV(), is, n, data, cluster, THREADS, l2i);

				List<ParseNBest> parses = Decoder.decode(pos,  data, options.decodeProjective,pipe.extractor[0]);
				Parse d = parses.get(0);
				double e= pipe.errors(is, n ,d);

				if (d.f1>0)f1+=(d.labels.length-1 -e) /(d.labels.length-1);

				if (e<=0) continue;

				// get predicted feature vector
				pred.clear();
				pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],d.heads, d.labels, is.feats[n],pipe.cl, pred);

				error += e;

				act.clear();
				pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],is.heads[n], is.labels[n], is.feats[n],pipe.cl, act);

				params.update(act, pred, is, n, d, upd,e);
			}

			String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
			+" te "+((Pipe.timeExtract)/1000000F)+" nz "+params.countNZ();
			PipeGen.outValueErr(numInstances,Math.round(error*1000)/1000,f1/numInstances,del,last, upd,info);
			del=0;
			long end = System.currentTimeMillis();
			System.out.println(" time:"+(end-start));			

			
			ParametersFloat pf = params.average2((iter+1)*is.size());
			try {

				if (options.testfile!=null) {
					outputParses (options, pipe, pf, ! MAX_INFO);
					ParserEvaluator.evaluate(options.goldfile, options.outfile);
			//		writeModell(options, pf, ""+(iter+1),pipe.cl); 
				}
		
				
			} catch (Exception e) {
				e.printStackTrace();
			} 


			Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0;


		}
		params.average(iter*is.size());
	}                                   


	/**
	 * Do the parsing
	 * @param options
	 * @param pipe
	 * @param params
	 * @throws IOException
	 */
	 private void outputParses (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception {

		long start = System.currentTimeMillis();

		CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
		CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask);

//		ExtractorClusterStacked.initFeatures();

		int cnt = 0;
		int del=0;
		long last = System.currentTimeMillis();

		if (maxInfo) System.out.println("\nParsing Information ");
		if (maxInfo) System.out.println("------------------- ");

		if (maxInfo && !options.decodeProjective) System.out.println(""+Decoder.getInfo());
		
	//	if (!maxInfo) System.out.println();

		String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
		for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet())  	types[e.getValue()] = e.getKey();

		
		System.out.print("Processing Sentence: ");

		while(true) {

			Instances is = new Instances();
			is.init(1, new MFB(),options.formatTask);

			SentenceData09 instance = pipe.nextInstance(is, depReader);
			if (instance==null) break;
			cnt++;

			SentenceData09 i09 = this.parse(instance,params);
	
	//		}
			depWriter.write(i09);
			del=PipeGen.outValue(cnt, del,last);
	//		DB.println("xbest "+amongxbest+" cnt "+cnt+" "+((float)((float)amongxbest/cnt))+" nbest "+((float)nbest/cnt)+
	//				" 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+" best-proj "+((float)bestProj/cnt));

		}

		//pipe.close();

		depWriter.finishWriting();
		long end = System.currentTimeMillis();
		DB.println("rank\n"+rank+"\n");
		DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
				+((float)((float)amongxbest/cnt))+
				" x-best-ula "+((float)((float)amongxbest_ula/cnt))+
				" nbest "+((float)nbest/cnt)+
				" 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
				" best-proj "+((float)bestProj/cnt)+
				" Sum LAS "+((float)this.smallestErrorSum/countAllNodes));
		
		//		DB.println("errors "+error);
		
		rank.clear();
		amongxbest=0;amongxbest_ula=0;
		cnt=0;
		nbest=0;
		bestProj=0;
		if (maxInfo) System.out.println("Used time " + (end-start));
		if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown);

	}

	 
		/**
		 * Do the parsing
		 * @param options
		 * @param pipe
		 * @param params
		 * @throws IOException
		 */
		 private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception {


			CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);

		//	ExtractorClusterStacked.initFeatures();

			int cnt = 0;

			String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
			for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet())  	types[e.getValue()] = e.getKey();
			
//			System.out.print("Processing Sentence: ");

			while(true) {

				Instances is = new Instances();
				is.init(1, new MFB(),options.formatTask);

				SentenceData09 instance = pipe.nextInstance(is, depReader);
				if (instance==null) break;
				cnt++;

				this.parseNBest(instance);		
			}
			
			//pipe.close();
//			depWriter.finishWriting();
//			long end = System.currentTimeMillis();
//			DB.println("rank\n"+rank+"\n");
//			DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
//					+((float)((float)amongxbest/cnt))+
//					" x-best-ula "+((float)((float)amongxbest_ula/cnt))+
//					" nbest "+((float)nbest/cnt)+
//					" 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
//					" best-proj "+((float)bestProj/cnt));
			//		DB.println("errors "+error);
			
	
		}


	 public SentenceData09 parse (SentenceData09 instance, ParametersFloat params)   {

		String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
		for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet())  	types[e.getValue()] = e.getKey();

		Instances is = new Instances();
		is.init(1, new MFB(),options.formatTask);
		new CONLLReader09().insert(is, instance); 
				
		String[] forms = instance.forms;

		// use for the training ppos
		DataF d2;
		try {
			d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS,l2i);//cnt-1
		} catch (Exception e ) {
			e.printStackTrace();
			return null;
		}
		short[] pos = is.pposs[0];

		List<ParseNBest> parses=null;
		Parse d=	null;
		try {
			parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
			d = parses.get(0);
		}catch (Exception e) {		
			e.printStackTrace();
		}

		if (parses.size()>NBest) parses = parses.subList(0,NBest);

		int g_las = Decoder.getGoldRank(parses, is,0,Decoder.LAS);
		int g_ula = Decoder.getGoldRank(parses, is,0,!Decoder.LAS);
		
		int smallest = Decoder.getSmallestError(parses, is,0,!Decoder.LAS);
		smallestErrorSum+=is.length(0)-smallest;
		countAllNodes+=is.length(0);
		
		if (g_las>=0) amongxbest++;
		if (g_ula>=0) amongxbest_ula++;
		
		nbest+=parses.size();
		
		Integer r = rank.get(g_las);
		if (r==null) rank.put(g_las, 1);
		else rank.put(g_las, r+1);
		
		float err = (float)this.pipe.errors(is,0, d);
		
		float errBestProj = (float)this.pipe.errors(is,0, Decoder.bestProj);
		
		if (errBestProj==0) bestProj++;

		SentenceData09 i09 = new SentenceData09(instance);
		
		i09.createSemantic(instance);
		
		for(int j = 0; j < forms.length-1; j++) {
			i09.plabels[j] = types[d.labels[j+1]];
			i09.pheads[j] = d.heads[j+1];
		}
		return i09;

	}

	 public List<ParseNBest> parseNBest (SentenceData09 instance)   {

		Instances is = new Instances();
		is.init(1, new MFB(),options.formatTask);
		new CONLLReader09().insert(is, instance); 
		
		// use for the training ppos
		DataF d2;
		try {
			d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS, l2i);//cnt-1
		} catch (Exception e ) {
			e.printStackTrace();
			return null;
		}
		short[] pos = is.pposs[0];

		List<ParseNBest> parses=null;
		try {
			parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
		}catch (Exception e) {		
			e.printStackTrace();
		}


		if (parses.size()>NBest) parses = parses.subList(0,NBest);

		return parses;

	}
	
	

	/* (non-Javadoc)
	 * @see is2.tools.Tool#apply(is2.data.SentenceData09)
	 */
		
	@Override
	public SentenceData09 apply(SentenceData09 snt09) {

		SentenceData09 it = new SentenceData09();
		it.createWithRoot(snt09);

		SentenceData09 out=null;
		try {


			//		for(int k=0;k<it.length();k++) {
			//			it.forms[k] = reader.normalize(it.forms[k]);
			//			it.plemmas[k] = reader.normalize(it.plemmas[k]);
			//		}

			out = parse(it,this.params);


		} catch(Exception e) {
			e.printStackTrace();
		}

		Decoder.executerService.shutdown();		
		Pipe.executerService.shutdown();

		return out;
	}
	
	/**
	 * Write the parsing model
	 * 
	 * @param options
	 * @param params
	 * @param extension
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException {
		
		String name = extension==null?options.modelName:options.modelName+extension;
//		System.out.println("Writting model: "+name);
		ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name)));
		zos.putNextEntry(new ZipEntry("data")); 
		DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));     
		
		MFB.writeData(dos);
		cs.write(dos);
		
		params.write(dos);

		Edges.write(dos);
		
		dos.writeBoolean(options.decodeProjective);

		dos.writeInt(pipe.extractor[0].getMaxForm());
		
		dos.writeInt(5);  // Info count
		dos.writeUTF("Used parser   "+Parser.class.toString());
		dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date()));
		dos.writeUTF("Training data "+options.trainfile);
		dos.writeUTF("Iterations    "+options.numIters+" Used sentences "+options.count);
		dos.writeUTF("Cluster       "+options.clusterFile);
		
		dos.flush();
		dos.close();
	}




}
